1. Dèces du Covid-19 par tranche d'âge
2. Dèces toute cause : Focus sur 18-39 ans
3. Vaccination : Focus sur 18-39 ans
4. Excès de Mortalité : Focus sur 18-39 ans
5. Données relatives aux effets secondaires de la vaccination Covid19
import pandas as pd
import numpy as np
import re
import requests, zipfile, io
import time
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime
def month_number_to_name(month_num):
datetime_object = datetime.datetime.strptime(str(month_num), "%m")
full_month_name = datetime_object.strftime("%B")
return full_month_name
def time_format(df,columns=['JNAIS','JDEC','MNAIS','MDEC']):
# fix wrong dates like 2021/03/00
for column in columns:
df[column]=df[column].replace(0,1).fillna(1)
return df
def get_zipfile(url):
# download zipfile to memory
reponse = requests.get(url)
time.sleep(1)
return zipfile.ZipFile(io.BytesIO(reponse.content))
def get_last_update_date():
# get lateset total deaths files from insee
response=requests.get('https://www.insee.fr/fr/statistiques/4487988?sommaire=4487854')
time.sleep(1)
last_update_date = re.search('4487988/(.*)_detail', response.text).group(1)
in_progress_month= last_update_date[0:7]
# last_update_date: last update date
# in_progress_month: current month
return last_update_date, in_progress_month
def age_and_format(df,age_groups, death_places, in_progress_month):
df_calculated = time_format(df.copy())
# format dates
df_calculated['death_date']=pd.to_datetime(dict(year=df_calculated.ADEC, month=df_calculated.MDEC, day=df_calculated.JDEC))
df_calculated['death_year_month']=df_calculated.ADEC.astype(str)+'-'+df_calculated.MDEC.astype(str).str.zfill(2)
df_calculated['birth_date']=pd.to_datetime(dict(year=df_calculated.ANAIS, month=df_calculated.MNAIS, day=df_calculated.JNAIS))
df_calculated.drop(columns=['JDEC','ANAIS','MNAIS','JNAIS',"DEPDEC"],inplace=True)
df_calculated['age']=((df_calculated['death_date']-df_calculated['birth_date']).dt.days / 365.25).apply(np.floor).astype(np.int64)
# add age groups
df_calculated = pd.merge_asof(df_calculated.sort_values('age'),age_groups)
df_calculated = df_calculated.merge(death_places)[['ADEC','MDEC', 'SEXE', 'death_place', 'death_year_month', 'age','age_group']]
# rename columns
df_calculated.columns=['death_year',"death_month", 'gender', 'death_place', 'death_year_month', 'age','age_group']
# remove current month
return df_calculated.query(f'death_year_month!="{in_progress_month}"').sort_values('death_year_month')
# all deaths: places format
death_places=pd.DataFrame({'LIEUDEC2':['HopCli', 'HosMar', 'Logem', 'Autres', 'Non renseigne'],
'death_place':['hospital', 'nursing home', 'home', 'street or any public place', 'unknown']})
# all deaths: places format
age_groups = pd.DataFrame({'age':[0, 1, 12, 18, 40, 50, 60, 70, 80, 90],
'age_group':['0->1', '1->11', '12->19', '18->39', '40->49', '50->59', '60->69', '70->79', '80->89', '90+']})
# covid deaths: age group format
cl_age90 = pd.DataFrame({'cl_age90':[9, 19, 29, 39, 49, 59, 69, 79, 89, 90],
'age_group':['0->9', '10->19', '20->29', '30->39', '40->49', '50->59', '60->69', '70->79', '80->89','90+']})
# read raw data from sante publique france
raw_daily_deaths=pd.read_csv("https://www.data.gouv.fr/fr/datasets/r/08c18e08-6780-452d-9b8c-ae244ad529b3",
sep=";",
usecols=['reg','cl_age90','jour','dc']).merge(cl_age90)
# add Year-Month Column
raw_daily_deaths['death_year_month']=raw_daily_deaths['jour'].str[0:7]
# calculated total death by month
calulated_daily_deaths=raw_daily_deaths.groupby(['age_group','death_year_month','jour'])['dc'].sum().reset_index()
calulated_daily_deaths = calulated_daily_deaths.sort_values(['dc','jour'],ascending=False)\
.drop_duplicates(['age_group','death_year_month'])
calulated_daily_deaths.columns=['age_group','death_year_month','day','cumulated_covid_deaths']
# filter on lateset data and calculate percentage by age group
calulated_daily_deaths_latest =calulated_daily_deaths.query(f'day=="{calulated_daily_deaths.day.max()}"')\
.sort_values('age_group')
calulated_daily_deaths_latest['covid_deaths_weight']= calulated_daily_deaths_latest['cumulated_covid_deaths'].astype(str)+" "+\
round(calulated_daily_deaths_latest['cumulated_covid_deaths']/\
calulated_daily_deaths_latest['cumulated_covid_deaths'].sum()*100,2).astype(str)+"%"
# covid deaths by age group: Bar Plot
fig = px.bar(calulated_daily_deaths_latest, x='age_group', y='cumulated_covid_deaths',text="covid_deaths_weight")
fig.update_traces(textposition='outside')
fig.update_layout(title={'text': f"""<b>Covid Deaths at hospital by age group<br>From Mars 2020 To {calulated_daily_deaths.day.max()}<br>""",
'x': 0.5,
'font_size':12},
hovermode="x",
plot_bgcolor='rgba(0,0,0,0)')
# Set x-axis title
fig.update_xaxes(title_text="<b>age group</b>")
# Set y-axes titles
fig.update_yaxes(title_text="<b>number of deaths</b>")
fig.show()
# covid deaths by age group: Pie Chart
cl_age90_grouped = pd.DataFrame({'age_groups':['0->39', '0->39', '0->39', '0->39', '40->49', '50->59', '60->69', '70->79', '80->89','90+'],
'age_group':['0->9', '10->19', '20->29', '30->39', '40->49', '50->59', '60->69', '70->79', '80->89','90+']})
calulated_daily_deaths_latest_grouped=calulated_daily_deaths_latest.merge(cl_age90_grouped).groupby('age_groups')['cumulated_covid_deaths'].sum().reset_index()
labels = calulated_daily_deaths_latest_grouped.age_groups
values = calulated_daily_deaths_latest_grouped.cumulated_covid_deaths
fig = go.Figure(data=[go.Pie(labels=labels, values=values, textinfo='label+percent',sort=False
)])
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',showlegend=False)
fig.update_layout(title={'text': f"""<b>Covid Deaths at hospital by age group<br>From Mars 2020 To {calulated_daily_deaths.day.max()}<br>""",
'x': 0.5,
'y':0.96,
'font_size':12},
hovermode="x",
plot_bgcolor='rgba(0,0,0,0)')
fig.show()
# get lateset total deaths files from insee
last_update_date, in_progress_month = get_last_update_date()
zip_file=get_zipfile(f"https://www.insee.fr/fr/statistiques/fichier/4487988/{last_update_date}_detail.zip")
# files to unzip
files=['DC_2018_det.csv','DC_2019_det.csv','DC_20202021_det.csv']
# unzip files to dictionnary then to DataFrame
all_deaths_by_year = {file:pd.read_csv(io.BytesIO(zip_file.read(file)),sep=";").fillna(1) for file in files}
all_deaths_all_years=pd.concat(all_deaths_by_year).reset_index(drop=True)
all_deaths_all_years.head(1)
| ADEC | MDEC | JDEC | DEPDEC | COMDEC | ANAIS | MNAIS | JNAIS | SEXE | COMDOM | LIEUDEC2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2018 | 1 | 1 | 974 | 97416 | 1925 | 10.0 | 20.0 | F | 97401 | HopCli |
# format and calculate ages
raw_deaths=age_and_format(all_deaths_all_years,age_groups,death_places, in_progress_month)
# aggregate deaths and keep only 18->39 age group
aggregated_deaths = raw_deaths.query('age_group=="18->39"')\
.groupby(['death_year','death_month','death_year_month','age_group'])\
['age'].count()\
.reset_index().rename(columns={'age':'number_of_deaths'})\
.sort_values('death_year_month',ascending=False)
# Visualize difference between 4 last years
fig = px.line(aggregated_deaths.query('death_month>4'), x="death_month", y="number_of_deaths", color="death_year")
fig.update_layout(title={'text': f"""<b>Evolution of deaths year by year<br>age group: 18->39<br>""",
'x': 0.5,
'font_size':12},
hovermode="x",
plot_bgcolor='rgba(0,0,0,0)',
legend_title_text='Year')
deaths_202108=aggregated_deaths.query("death_year_month=='2021-08'")['number_of_deaths'].iloc[0]
fig.add_annotation(x=7.5, y=deaths_202108+5,
text=f"<b>the 2 deadliest months since 2018</b><br> with <b>~920 deaths each</b>",
showarrow=True,
ax=-45,
ay=-45,
arrowhead=2)
# Set x-axis title
fig.update_xaxes(title_text="<b>Month</b>",
tickmode = 'linear')
# Set y-axes titles
fig.update_yaxes(title_text="<b>number of deaths 18->39</b>")
fig.show()
# calculate evol vs M-1
evol_vs_prev_month = aggregated_deaths.sort_values(['death_year_month'])
evol_vs_prev_month['evol_vs_prev_month']=evol_vs_prev_month.groupby(['age_group'])['number_of_deaths'].pct_change(1)
# calculate evol vs previous years: keep only august
evol_vs_previous_years = aggregated_deaths.query('death_month in [8]').sort_values('death_year_month')
# Calcualte Excess Mortality
covid_death_by_month=calulated_daily_deaths.query('age_group in ["20->29","30->39"]').groupby(['death_year_month']).sum().reset_index()
covid_death_by_month['deaths_by_month']=covid_death_by_month.cumulated_covid_deaths.diff(periods=1).fillna(18)
excess_mortality={'average_deaths_2018_2019':round(evol_vs_previous_years.query('death_year_month in ["2018-08","2019-08"]').number_of_deaths.mean(),0),
'covid_deaths_2021':covid_death_by_month.query('death_year_month in ["2021-08"]').deaths_by_month.mean(),
'total_deaths_2021':evol_vs_previous_years.query('death_year_month in ["2021-08"]').number_of_deaths.mean()
}
# Visualize deaths of 18->39
fig = go.Figure()
fig.add_trace(go.Scatter(
x=evol_vs_previous_years.death_year_month,
y=evol_vs_previous_years.number_of_deaths,
mode="lines+text",
name="Lines and Text",
text=evol_vs_previous_years.number_of_deaths,
textposition="top center"
))
excess_rate = round((excess_mortality['total_deaths_2021']/excess_mortality['average_deaths_2018_2019']-1)*100,2).astype(str)+"%"
# Set x-axis title
fig.update_xaxes(title_text="<b>Month</b>", type='category')
# Set y-axes titles
fig.update_yaxes(title_text="<b>number of deaths 18->39</b>")
fig.update_layout(title={'text': f"""<b>Evolution of August deaths by year<br>age group: 18->39<br>""",
'x': 0.5,
'font_size':12},
plot_bgcolor='rgba(0,0,0,0)')
fig.add_annotation(x=2.9, y=deaths_202108,
text=f"Mortality Increase: <b>{excess_rate}</b><br>vs average August 2018-2019",
showarrow=True,
ax=-70,
ay=-45,
arrowhead=2)
fig.show()
# call Ameli API
response=requests.get('https://datavaccin-covid.ameli.fr/api/records/1.0/search/?dataset=donnees-vaccination-par-tranche-dage-type-de-vaccin-et-departement&q=&rows=1000&facet=date_reference&facet=semaine_injection&facet=region_residence&facet=libelle_region&facet=departement_residence&facet=libelle_departement&facet=classe_age&facet=libelle_classe_age&facet=type_vaccin&refine.type_vaccin=Tout+vaccin&refine.libelle_region=FRANCE&exclude.classe_age=TOUT_AGE')
# convert json to Df
weekly_vaccination_progress = pd.json_normalize(response.json()['records'])\
[['fields.date','fields.classe_age','fields.taux_cumu_termine']]\
.sort_values(['fields.date','fields.classe_age']).fillna(0)\
.query('`fields.classe_age`in ["18-24","25-39"]')
# aggregate vaccination data by month (from day to month)
weekly_vaccination_progress['month_vaccination']=weekly_vaccination_progress['fields.date'].str[0:7]
weekly_vaccination_progress.columns=['date_vaccination','age_group','progress_vaccination','month_vaccination']
monthly_vaccination_progress=weekly_vaccination_progress.groupby(['age_group','month_vaccination'])\
['progress_vaccination'].max().reset_index()
monthly_vaccination_progress['age_group']="18->39"
monthly_vaccination_progress=monthly_vaccination_progress.groupby(['age_group','month_vaccination'])\
['progress_vaccination'].max().reset_index()
# Visualize vaccination progress
fig = px.line(monthly_vaccination_progress, x="month_vaccination", y="progress_vaccination", color="age_group")
fig.update_layout(title={'text': f"""<b>Evolution of vaccination rates<br>age group: 18->39<br>""",
'x': 0.5,
'font_size':12},
yaxis={'tickformat': ".0%"},
hovermode="x",
plot_bgcolor='rgba(0,0,0,0)',
legend_title_text='Age Group')
# Set x-axis title
fig.update_xaxes(title_text="<b>Month", type='category')
# Set x-axis title
fig.update_yaxes(title_text="<b>% of vaccinated 18->39")
fig.show()
# death of 18->39
deaths_young_people=aggregated_deaths.query('death_year_month in ["2021-05","2021-06","2021-07","2021-08"]')\
.sort_values('death_year_month')[['death_year_month','number_of_deaths']]
vaccine_young_people=monthly_vaccination_progress.query('age_group=="18->39" and month_vaccination in ["2021-05","2021-06","2021-07","2021-08"]')
death_vax=deaths_young_people.merge(vaccine_young_people,left_on='death_year_month',right_on='month_vaccination')\
.drop('death_year_month',1)
/tmp/ipykernel_40012/923146716.py:1: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only
correlation_coef=death_vax['number_of_deaths'].corr(death_vax['progress_vaccination'])
# Is there a correlation between vaccination rate and deaths?
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=death_vax.month_vaccination,
y=death_vax.number_of_deaths,
name="deaths rate 18->39"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=death_vax.month_vaccination,
y=death_vax.progress_vaccination,
name="vaccination rate 18->39"),
secondary_y=True,
)
fig.update_layout(
title_x=0.5,
)
fig.update_layout(title={'text': f"""<b>Evolution of vaccination and deaths rates<br>age group: 18->39</b>""",
'x': 0.5,
'font_size':12},
yaxis2={'tickformat': ".0%"},
hovermode="x",
plot_bgcolor='rgba(0,0,0,0)')
# Set x-axis title
fig.update_xaxes(title_text="date", type='category')
# Set y-axes titles
fig.update_yaxes(title_text="<b>deaths rate 18->39</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>vaccination rate 18->39</b>", secondary_y=True)
fig.add_annotation(x=1, y=900,
text=f"<br>High Correlation Coefficient of <b>{round(correlation_coef,2)}",
showarrow=False)
fig.show()
yaxis_data = [excess_mortality['average_deaths_2018_2019'],
excess_mortality['covid_deaths_2021'],
excess_mortality['total_deaths_2021']-excess_mortality['covid_deaths_2021']-excess_mortality['average_deaths_2018_2019'],
excess_mortality['total_deaths_2021']]
fig = go.Figure(go.Waterfall(
measure = ["absolute", "relative", "relative", "total"],
x = ["Average Deaths<br> Aug. 18- Aug. 19", "Covid Deaths Aug. 2021", "Non-Covid Deaths Aug. 2021", "Total Deaths Aug. 2021"],
textposition = "outside",
text = yaxis_data,
y = yaxis_data
))
fig.update_yaxes(title_text="<b>number of deaths 18->39</b>",range=[700, 950])
fig.update_layout(title={'text': f"""August 2021 Mortality vs average August 2018-2019<br>age group: 18->39
<br>Mortality Increase: <b>{excess_rate}</b>""",
'x': 0.5,
'y':0.9,
'font_size':12},
plot_bgcolor='rgba(0,0,0,0)')
fig.add_annotation(x=1.5, y=750,
text=f"""<b>{(yaxis_data[2]/(yaxis_data[2]+yaxis_data[1])):.0%} of the mortality increase is not due to covid</b>""",
showarrow=False)
fig.show()
eudravigilence_data=pd.read_csv('https://github.com/farzelhaar/covid_analysis_france/blob/309d0cc8b5cfe7f20f3050bb7267feb36f52a96a/eudravigilence_vaccination_europe.csv?raw=true')
eudravigilence_data_aggregated=eudravigilence_data.groupby('Vaccine').sum().reset_index().melt(['Vaccine']).query('variable!="Total"')
eudravigilence_data_aggregated.columns=['vaccine_name','seriousness','cases_reported']
fig = px.bar(eudravigilence_data_aggregated, x='cases_reported', y='vaccine_name',text='cases_reported',
orientation="h",color='seriousness')
fig.update_traces(textposition='auto')
serious_side_effects=str(round(eudravigilence_data_aggregated.query('seriousness=="Serious"').cases_reported.sum()/1000000,2))+" Millions serious side effects reported in the EU"
fig.update_layout(title={'text': f"""<b>Number of Vaccines' side effects cases reported in the EU<br>{serious_side_effects}""",
'x': 0.5,
'font_size':14},
plot_bgcolor='rgba(0,0,0,0)')
# Set x-axis title
fig.update_xaxes(title_text="<b>Number of Serious Side Effects</b>")
# Set y-axes titles
fig.update_yaxes(title_text="<b>Vaccine Name</b>")
fig.show()